// Anime4K_Upscale_Denoise_CNN_x2_L
// 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale%2BDenoise/Anime4K_Upscale_Denoise_CNN_x2_L.glsl

//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_Denoise_1


//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex1;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex2;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex3;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex4;

//!SAMPLER
//!FILTER POINT
SamplerState sam;

//!SAMPLER
//!FILTER LINEAR
SamplerState sam1;


//!PASS 1
//!DESC Conv-4x3x3x3
//!IN INPUT
//!OUT tex1, tex2
//!BLOCK_SIZE 16
//!NUM_THREADS 64

void Pass1(uint2 blockStart, uint3 threadId) {
	uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}
	float2 inputPt = GetInputPt();

	uint i, j;

	float3 src[4][4];
	[unroll]
	for (i = 0; i <= 2; i += 2) {
		[unroll]
		for (j = 0; j <= 2; j += 2) {
			float2 tpos = (gxy + uint2(i, j)) * inputPt;
			const float4 sr = INPUT.GatherRed(sam, tpos);
			const float4 sg = INPUT.GatherGreen(sam, tpos);
			const float4 sb = INPUT.GatherBlue(sam, tpos);

			// w z
			// x y
			src[i][j] = float3(sr.w, sg.w, sb.w);
			src[i][j + 1] = float3(sr.x, sg.x, sb.x);
			src[i + 1][j] = float3(sr.z, sg.z, sb.z);
			src[i + 1][j + 1] = float3(sr.y, sg.y, sb.y);
		}
	}

	[unroll]
	for (i = 1; i <= 2; ++i) {
		[unroll]
		for (j = 1; j <= 2; ++j) {
			uint2 destPos = gxy + uint2(i - 1, j - 1);

			if (i != 1 || j != 1) {
				if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) {
					continue;
				}
			}

			float4 target1 = mul(src[i - 1][j - 1], float3x4(-0.050913796, -0.05115213, -0.0205767, -0.26266688, -0.12883802, 0.107968464, 0.03389763, -0.70179373, 0.0030511466, 0.07718592, -0.06562523, -0.060305536));
			target1 += mul(src[i - 1][j], float3x4(0.009235469, -0.018979615, 0.10033019, -0.20307243, 0.040932532, -0.10095427, 0.038843542, -0.28774044, -0.07829864, -0.04317961, 0.032555006, -0.05584433));
			target1 += mul(src[i - 1][j + 1], float3x4(0.23774138, 0.04701499, -0.16824278, 0.025335955, 0.30246395, -0.037289508, 0.070405066, 0.03094164, -0.0075012813, 0.06881163, -0.03157643, -0.032394916));
			target1 += mul(src[i][j - 1], float3x4(-0.12524955, 0.18535072, -0.05323482, 0.004486272, 0.15295836, 0.3050709, 0.081431866, 0.09352846, -0.059866652, -0.029570978, 0.019920588, 0.121749535));
			target1 += mul(src[i][j], float3x4(-0.2111615, -0.1268416, 0.45642895, 0.47401953, -0.7580866, 0.5514855, 0.96250856, 0.7827129, 0.0003978912, 0.17167407, -0.04423575, -0.04569368));
			target1 += mul(src[i][j + 1], float3x4(0.17050457, -0.18697786, -0.11608587, -0.038065948, 0.26542, -0.7021022, -0.33751717, 0.053689335, 0.10030526, -0.19492362, 0.069387496, 0.07228368));
			target1 += mul(src[i + 1][j - 1], float3x4(0.15900351, -0.017636139, 0.01917807, 0.05584281, 0.28530255, 0.04795445, -0.104170926, 0.1192509, 0.09859251, 0.057123564, 0.025724344, -0.07723904));
			target1 += mul(src[i + 1][j], float3x4(-0.06581913, 0.07548721, -0.054552317, -0.08317343, 0.32851526, -0.2362575, -0.39470714, -0.073999345, 0.07246812, -0.04103072, 0.06058696, 0.09532553));
			target1 += mul(src[i + 1][j + 1], float3x4(-0.12524493, 0.095179625, -0.0918538, 0.016793616, -0.48433152, 0.03702525, -0.100864686, -0.0018861603, -0.14784335, -0.048320837, -0.057494648, -0.024096634));
			target1 += float4(-0.012922576, -0.11982956, 0.021963459, 0.019259451);

			float4 target2 = mul(src[i - 1][j - 1], float3x4(0.04816902, 0.030087546, 0.019183155, -0.08234757, 0.09378316, -0.047217257, -0.04757087, -0.16541782, -0.043394983, 0.05779227, 0.018105166, 0.03222583));
			target2 += mul(src[i - 1][j], float3x4(0.13639967, -0.001877575, 0.049495522, 0.060094353, 0.015303669, 0.059043188, 0.090356335, -0.12654372, 0.06469071, -0.054733396, -0.013548386, -0.093697555));
			target2 += mul(src[i - 1][j + 1], float3x4(-0.13214277, 0.00062924915, -0.640379, -0.052121993, -0.022532608, 0.01077454, -0.057074178, -0.103670195, -0.0017062012, 0.0035225085, -0.044859786, -0.020764757));
			target2 += mul(src[i][j - 1], float3x4(0.2553945, -0.08126201, 0.055215932, 0.10690791, 0.6771195, 0.09377514, -0.09488318, -0.43969935, 0.35444704, -0.10392259, 0.07595239, 0.021814484));
			target2 += mul(src[i][j], float3x4(-0.37628967, 0.026895085, 0.035044484, -0.16414654, -0.5694931, -0.20123884, 0.14891861, 1.1822934, -0.25648627, 0.14110301, -0.057699542, 0.17731132));
			target2 += mul(src[i][j + 1], float3x4(0.023089241, 0.14888923, -0.2730167, 0.1330048, -0.039043408, 0.75768983, 0.07385114, 0.0138615575, -0.06565686, 0.10451973, 0.037489507, 0.021156311));
			target2 += mul(src[i + 1][j - 1], float3x4(0.03965048, 0.040422294, -0.0662493, -0.043219455, 0.00834316, -0.08315282, 0.13010995, -0.11822414, -0.06811034, 0.029744523, -0.098641835, -0.063671604));
			target2 += mul(src[i + 1][j], float3x4(-0.077282995, -0.29400682, 0.116103284, 0.096747644, -0.47398612, -0.77101594, -0.20683232, 0.111703634, -0.08370965, -0.24218678, 0.13780457, -0.017660126));
			target2 += mul(src[i + 1][j + 1], float3x4(0.08542605, 0.13080615, 0.081582755, -0.00024888176, 0.31160986, 0.17787197, -0.019935975, -0.09658498, 0.096656196, 0.064402744, -0.033331197, 0.027531069));
			target2 += float4(-0.0018859988, 0.004285429, 0.5060845, -0.030093472);

			tex1[destPos] = target1;
			tex2[destPos] = target2;
		}
	}
}


//!PASS 2
//!DESC Conv-4x3x3x16
//!IN tex1, tex2
//!OUT tex3, tex4
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass2(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	float4 e1 = tex1.SampleLevel(sam, pos, 0);
	float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	float4 na1 = max(-a1, 0);
	float4 nb1 = max(-b1, 0);
	float4 nc1 = max(-c1, 0);
	float4 nd1 = max(-d1, 0);
	float4 ne1 = max(-e1, 0);
	float4 nf1 = max(-f1, 0);
	float4 ng1 = max(-g1, 0);
	float4 nh1 = max(-h1, 0);
	float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	float4 e2 = tex2.SampleLevel(sam, pos, 0);
	float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	float4 na2 = max(-a2, 0);
	float4 nb2 = max(-b2, 0);
	float4 nc2 = max(-c2, 0);
	float4 nd2 = max(-d2, 0);
	float4 ne2 = max(-e2, 0);
	float4 nf2 = max(-f2, 0);
	float4 ng2 = max(-g2, 0);
	float4 nh2 = max(-h2, 0);
	float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	float4 target1 = mul(a1, float4x4(0.34559122, 0.052896723, -0.27492252, -0.1604473, 0.4791457, 0.17956258, 0.0076199574, -0.16324736, -0.075430416, 0.019434236, -0.275363, -0.16502565, 0.05507322, -0.046572465, 0.08130956, 0.009380191));
	target1 += mul(b1, float4x4(0.1754505, 0.10862336, -0.14956018, 0.20161937, 0.16598102, -0.0033441933, 0.19303258, 0.3278992, -0.31819978, 0.14614153, 0.08434212, 0.21208692, -0.0014794758, -0.06754758, -0.06314527, 0.023496931));
	target1 += mul(c1, float4x4(0.13594365, -0.06382366, -0.40069854, -0.087743916, 0.022426397, -0.073364444, -0.19371308, 0.09916138, -0.044016927, 0.0018689828, -0.07705671, 0.15398589, -0.069929935, -0.01874144, 0.050793763, 0.06565281));
	target1 += mul(d1, float4x4(0.56292456, 0.25537506, -0.16147509, 0.029484648, 0.11898947, 0.19103922, -0.2387553, 0.13659279, -0.044804625, -0.10285909, 0.12958583, 0.21526133, 0.02727471, 0.21990417, 0.0009558564, 0.12372512));
	target1 += mul(e1, float4x4(-0.10264466, -0.13103753, -0.069214605, 0.43234769, 0.25947884, -0.18333039, -0.15585582, -0.2406589, 0.33275372, -0.19497354, -0.09758474, -0.4531396, 0.41932744, -0.043746196, 0.08315102, -0.085604236));
	target1 += mul(f1, float4x4(0.15380725, -0.06311845, -0.28896615, -0.059237756, -0.078456834, -0.11623796, 0.017248835, 0.098803006, -0.13643564, -0.0029720776, 0.425954, 0.36920592, -0.06980546, 0.05205535, -0.15787347, -0.094921984));
	target1 += mul(g1, float4x4(0.009595518, -0.12598279, -0.04322495, -0.08838463, 0.11729769, -0.062454883, 0.19743776, -0.08590505, -0.022744715, 0.00457582, -0.06070008, 0.045312855, -0.010845991, -0.02241941, 0.07252932, 0.05525124));
	target1 += mul(h1, float4x4(-0.119069465, 0.08782395, 0.17878884, 0.0068233046, -0.36698806, -0.46077076, 0.37470114, 0.006550318, 0.08622002, -0.10081386, 0.1754186, 0.078841425, 0.060330488, 0.39436886, 0.1688179, -0.10113108));
	target1 += mul(i1, float4x4(0.17160045, -0.18541232, -0.093926296, 0.0053854887, -0.07649591, -0.3053692, 0.15255369, 0.06183564, 0.105131835, 0.076607525, -0.17482935, -0.104579754, -0.4795174, 0.30223432, 0.4728322, 0.106419675));
	target1 += mul(a2, float4x4(-0.068794325, -0.019651407, 0.048906703, 0.10097784, 0.014003637, 0.08358555, -0.34008583, 0.1677446, 0.12863056, 0.010167976, 0.10771957, -0.14823496, -0.11855097, 0.024728613, -0.06394353, 0.07123295));
	target1 += mul(b2, float4x4(0.1652107, -0.056815207, 0.26562792, -0.02586732, 0.13812682, 0.3791579, -0.40067768, 0.19901459, -0.055583958, 0.06673556, -0.16258197, 0.0014027074, 0.13844898, 0.17588624, 0.0061608437, 0.014889389));
	target1 += mul(c2, float4x4(0.023591522, -0.06255483, -0.04512753, -0.07939918, 0.17603582, -0.06219873, -0.10907254, 0.012348696, -0.053350568, 0.023741387, 0.05215983, 0.117241465, 0.28173143, 0.11200327, -0.11672438, -0.13278063));
	target1 += mul(d2, float4x4(-0.15015969, -0.1145909, 0.08583166, 0.0386507, -0.17788467, 0.29311427, 0.03577728, -0.006737705, -0.020426478, 0.065881886, -0.10966947, -0.016716056, -0.0027577002, -0.20769168, 0.4357363, -0.13179652));
	target1 += mul(e2, float4x4(-0.44572783, 0.08870803, 0.42933974, -0.16602941, 0.23271243, 0.29478154, -0.53973556, -0.042550746, -0.13157314, -0.0413034, 0.12679552, 0.11579286, -0.5161936, -0.24292113, -0.10862491, 0.13528119));
	target1 += mul(f2, float4x4(-0.043000877, 0.08458555, 0.11260604, -0.5589381, -0.16010836, -0.019429926, 0.04731505, -0.12212733, 0.05655828, 0.0107375225, -0.10067243, -0.06904067, 0.07476142, -0.043922618, -0.13811466, 0.008697587));
	target1 += mul(g2, float4x4(-0.3281664, -0.104251154, 0.07188181, 0.06720938, 0.028879764, 0.07302547, 0.18261562, -0.08896491, 0.11240943, -0.1919612, -0.13059135, -0.07057044, 0.053953633, 0.17297988, -0.20344415, 0.050276734));
	target1 += mul(h2, float4x4(-0.41925356, 0.020309223, 0.2246313, -0.3418901, -0.20863962, 0.18653068, -0.04616101, 0.1236236, -0.062179156, 0.1437903, 0.1314142, 0.0699381, 0.029918872, 0.23033592, 0.09302733, -0.20570321));
	target1 += mul(i2, float4x4(0.07847491, -0.18251555, 0.0678772, -0.29089385, -0.03632992, -0.17132603, -0.04896196, 0.09839614, -0.10377483, -0.11817732, 0.03477946, 0.050376516, 0.17791937, -0.34359503, 0.030756304, 0.025246387));
	target1 += mul(na1, float4x4(-0.12972409, 0.032459006, -0.20415276, 0.31407776, -0.1743501, -0.26177478, -0.07577315, -0.104599, -0.025548192, -0.23483936, 0.40139225, 0.12898883, 0.06533049, -0.09545806, -0.032093894, 0.0032956926));
	target1 += mul(nb1, float4x4(0.22749326, -0.20613275, -0.23030083, -0.29994026, -0.18482473, -0.038720988, -0.13339107, -0.1394514, 0.36952803, -0.2709558, -0.14104684, -0.17859542, 0.09873891, 0.04330318, 0.15205383, 0.115995236));
	target1 += mul(nc1, float4x4(0.07534328, -0.13592403, 0.2224819, -0.06818886, -0.11952144, 0.004714797, 0.18252324, -0.08729513, 0.17198865, -0.00082568696, 0.33769485, -0.0920225, 0.173712, -0.038548574, -0.016980015, -0.13799237));
	target1 += mul(nd1, float4x4(-0.43659294, -0.19679698, -0.31969583, 0.24002865, -0.1064947, -0.08218358, -0.07990568, -0.028915526, -0.077836946, -0.012841249, -0.11685068, -0.2102985, 0.025435956, -0.21367492, 0.11001358, -0.09812692));
	target1 += mul(ne1, float4x4(0.28203383, 0.09570471, -0.14503846, -0.19898729, 0.18757457, 0.16626704, -0.009997161, 0.06738176, -0.18296066, 0.11583831, -0.0025225005, 0.373547, -0.24103725, 0.3553009, 0.11984093, 0.25370696));
	target1 += mul(nf1, float4x4(-0.022194814, 0.02950222, -0.121312395, 0.0040648654, 0.06509207, 0.00084966415, 0.032229617, 0.0139804585, -0.23108627, -0.004511493, -0.28217104, 0.0828633, 0.17399071, 0.2137328, 0.4731738, -0.37666738));
	target1 += mul(ng1, float4x4(-0.045961298, 0.0056297607, -0.08513672, 0.093939304, 0.07252928, -0.11458939, 0.11005008, -0.1132733, 0.10369599, 0.1636998, -0.11919379, -0.08949099, 0.080640145, 0.029493907, 0.24982096, -0.10234766));
	target1 += mul(nh1, float4x4(0.08474163, -0.24252129, -0.3065911, 0.11077523, 0.13397239, 0.14875948, -0.18212163, 0.006510455, -0.008477232, -0.3242149, 0.31507346, -0.19521071, -0.3610268, 0.25882444, -0.067812346, 0.20968717));
	target1 += mul(ni1, float4x4(0.05730163, 0.053821165, -0.10948745, 0.04090055, 0.0161064, 0.19475192, 0.09248433, -0.027268974, -0.031323943, -0.084304914, 0.28378648, 0.44910806, -0.052243132, 0.2999386, -0.26639074, -0.2529396));
	target1 += mul(na2, float4x4(0.026707547, -0.006487042, -0.044127557, -0.016287267, 0.1417188, 0.24645403, -0.32444936, 0.20339565, 0.027596464, 0.03799474, -0.029943593, 0.058569513, -0.15013286, 0.25070968, 0.08954207, -0.14304538));
	target1 += mul(nb2, float4x4(-0.22184753, -0.0732679, 0.042815078, 0.03770516, 0.22240163, -0.043244008, -0.14883384, -0.10682856, 0.16421252, 0.20890577, 0.000585579, -0.061031006, -0.551696, -0.17770186, 0.13795924, 0.101121314));
	target1 += mul(nc2, float4x4(-0.047539327, 0.11826275, 0.458172, -0.023809819, -0.0154842585, -0.015466883, 0.03837829, -0.34703115, -0.03437818, 0.12705797, -0.042713646, -0.2518409, -0.27947584, -0.020104226, -0.022687877, 0.14169087));
	target1 += mul(nd2, float4x4(0.06269709, 0.06449363, -0.02793847, 0.04407663, -0.054694284, 0.69776016, -0.32850045, 0.19365972, -0.19002354, -0.038244195, -0.20433429, -0.34071165, 0.123992935, -0.22218247, -0.30181807, -0.03031556));
	target1 += mul(ne2, float4x4(-0.06685185, -0.18313402, -0.03785641, 0.008412995, -0.017108139, 0.48937285, -0.035302214, 0.011338532, -0.08890957, 0.32343447, 0.088812076, -0.027280344, 0.40437454, -0.45940742, 0.118888274, 0.41054434));
	target1 += mul(nf2, float4x4(-0.36049488, 0.100708134, 0.331516, 0.1078647, 0.12895954, 0.13425021, -0.18602797, -0.11423174, -0.10916294, 0.061013293, 0.08984191, 0.1835112, -0.10568929, -0.046648484, 0.2127872, 0.54582083));
	target1 += mul(ng2, float4x4(0.19040897, 0.08670264, 0.12393752, -0.003475547, -0.37210098, 0.035628326, -0.29302806, 0.10709011, -0.20405664, -0.9748058, 0.39254782, 0.44914797, 0.032028764, 0.04227575, -0.25056216, 0.063437305));
	target1 += mul(nh2, float4x4(-0.07952942, -0.13263832, 0.037877183, 0.20845042, -0.026445981, -0.010450352, -0.043147005, -0.12033961, 0.20600243, -0.046332583, -0.47056386, 0.09566825, 0.18658772, -0.3381639, -0.042662457, 0.15197653));
	target1 += mul(ni2, float4x4(-0.4996296, 0.019971728, 0.10017604, 0.052051116, 0.12145858, 0.106811635, -0.056665674, -0.11708303, 0.16642408, 0.22654046, -0.04731226, -0.039967895, -0.1434505, 0.3171998, -0.19033776, -0.29952875));
	target1 += float4(0.03144068, -0.027781913, 0.04483475, 0.037489943);
	
	float4 target2 = mul(a1, float4x4(-0.031192884, -0.015032417, 0.25046152, 0.143142, 0.09429096, 0.2090414, -0.16252424, 0.42788, -0.005667558, 0.14787567, 0.23810932, -0.13502707, 0.0006289761, -0.014052179, -0.091041535, 0.059258565));
	target2 += mul(b1, float4x4(-0.09637771, 0.17332087, 0.123664804, 0.046110056, 0.25775972, 0.31647265, -0.1464598, 0.41624358, 0.032242253, -0.017219262, -0.35814875, 0.3348811, 0.05738627, 0.046910666, 0.014263179, -0.15797907));
	target2 += mul(c1, float4x4(-0.06782952, 0.049666278, 0.083296575, 0.19301543, -0.05964988, 0.18332662, 0.30906975, 0.03342819, 0.12226727, 0.1226969, -0.15035193, -0.003493911, -0.007647415, -0.051491078, -0.019189527, -0.009602449));
	target2 += mul(d1, float4x4(0.08838342, -0.055376932, 0.13949814, -0.12728734, -0.17266448, 0.35102528, 0.018773714, 0.050504927, -0.10556112, -0.014422574, -0.25474203, 0.31192264, -0.09063805, 0.010115312, -0.08702192, 0.08573518));
	target2 += mul(e1, float4x4(0.16521221, -0.01265248, -0.5292306, -0.17494588, -0.18994644, -0.41904125, -0.26261392, -0.42338082, 0.39478812, 0.20768805, 0.16483486, -0.22635488, 0.13576357, 0.17095351, 0.064293, 0.06416031));
	target2 += mul(f1, float4x4(-0.09107591, 0.1757355, 0.19841582, -0.25249094, 0.18083812, -0.12258315, 0.4074544, -0.17171176, -0.15881093, -0.22978021, -0.05622591, -0.09703007, -0.12538208, -0.06956953, -0.14475612, -0.066342294));
	target2 += mul(g1, float4x4(-0.029294115, -0.036292624, 0.19467807, -0.10223533, 0.086430565, -0.052809026, -0.23749635, 0.10364248, -0.22938702, 0.07210543, 0.03876035, -0.21014924, -0.11247329, -0.17755648, -0.05139757, -0.037780646));
	target2 += mul(h1, float4x4(0.12605286, 0.16123274, -0.13924524, -0.109194726, 0.033486, -0.24847955, 0.1264379, 0.28880134, -0.17594175, -0.1888256, -0.04508948, 0.047563452, -0.5476752, -0.23573762, -0.17183748, 0.14331517));
	target2 += mul(i1, float4x4(-0.006482806, 0.2289281, -0.03872587, -0.027272481, -0.09913351, -0.09453464, -0.1426349, 0.055076513, -0.025217436, -0.08307176, 0.0797406, 0.10166401, -0.294337, -0.3567936, 0.054015454, 0.068333104));
	target2 += mul(a2, float4x4(0.012300659, -0.040405195, 0.11190478, -0.07406065, -0.18364848, 0.035823543, -0.01621734, 0.07582391, 0.06704436, -0.0006620425, -0.022342965, 0.16496183, 0.11390146, 0.075079784, 0.13547076, -0.022227254));
	target2 += mul(b2, float4x4(0.23038611, -0.29141426, 0.0984085, -0.20544642, -0.18859404, 0.3620387, -0.4136066, 0.32138887, -0.0047645094, 0.11271573, 0.15377328, 0.012071895, -0.029830804, 0.14384824, 0.04148142, 0.2286753));
	target2 += mul(c2, float4x4(-0.120368056, -0.0026308578, -0.027536837, -0.13022487, 0.19286355, 0.30597997, -0.121778116, 0.29960433, -0.06231281, -0.013746478, 0.10620681, -0.02362372, -0.10042793, 0.015861828, -0.06073457, 0.11589962));
	target2 += mul(d2, float4x4(0.1148781, -0.24268909, 0.24827103, -0.17290637, -0.14397098, -0.16708367, 0.2130187, -0.18639165, -0.13702524, 0.107212365, 0.066469796, -0.14059094, 0.19621798, -0.036907773, -0.028576817, 0.19191594));
	target2 += mul(e2, float4x4(0.061653305, -0.12716687, 0.17514701, 0.003910376, -0.00651784, 0.25642744, -0.17615528, -0.03584991, -0.051342323, -0.20178711, -0.4330863, 0.15785883, -0.14388351, 0.050646614, 0.15746376, -0.17228809));
	target2 += mul(f2, float4x4(-0.32631296, -0.020115409, -0.16132942, 0.29139966, -0.18642388, -0.15140165, 0.2106485, -0.025535548, 0.08296747, 0.037819803, 0.106129125, -0.095521644, 0.312119, -0.09383011, -0.023469942, -0.035990953));
	target2 += mul(g2, float4x4(0.012878467, -0.1599543, 0.14487906, -0.083350256, 0.074949436, -0.09346481, 0.10122695, 0.08852621, 0.11138647, -0.0072039254, -0.00842464, 0.030785646, -0.04394235, 0.10987614, 0.15378197, -0.05989409));
	target2 += mul(h2, float4x4(0.41359067, -0.04985946, 0.06845964, 0.12003392, 0.0803128, 0.2420856, -0.18877462, 0.058456603, -0.02516271, 0.010639022, -0.04928307, -0.023084244, 0.06001203, 0.06881964, -0.12117699, -0.2680374));
	target2 += mul(i2, float4x4(0.09667388, 0.16247103, 0.105098106, 0.12871382, 0.063410334, 0.029997706, 0.048323907, -0.075631075, 0.034694012, -0.029085271, -0.003785678, -0.05397498, -0.1783155, -0.13680255, 0.024786513, -0.0041952017));
	target2 += mul(na1, float4x4(-0.23904142, -0.102619216, -0.21049559, -0.07428196, -0.046321787, -0.09432119, 0.08803711, -0.1660408, 0.31880215, 0.11605265, -0.086603194, 0.119239025, 0.06773056, 0.18591799, 0.0058458247, 0.05242187));
	target2 += mul(nb1, float4x4(0.12521484, -0.23739336, -0.16784379, -0.10277679, -0.18505791, 0.061825443, 0.12762548, -0.16664176, 0.20004764, -0.1400315, 0.35610282, -0.19706382, 0.046386316, -0.155162, -0.0425219, 0.0010560523));
	target2 += mul(nc1, float4x4(0.14500342, -0.0046809237, -0.1278097, 0.041527335, 0.11831141, -0.059155047, -0.17391829, 0.0059517594, -0.18033625, -0.379706, 0.11636179, -0.13310274, 0.047523372, 0.0029333998, -0.1512301, 0.1361489));
	target2 += mul(nd1, float4x4(-0.23058943, -0.08937329, 0.07061336, 0.08555644, 0.09255573, -0.15303029, 0.08891002, -0.42177418, 0.0950346, 0.20212616, 0.3866544, 0.07922501, -0.04093803, -0.10997976, -0.07189613, -0.21220057));
	target2 += mul(ne1, float4x4(-0.04484278, 0.2386453, 0.27855012, 0.011022442, 0.0409283, 0.1937425, 0.060258046, 0.2633126, -0.54181176, 0.19643608, -0.28907844, 0.04247623, -0.37548354, -0.24831985, -0.52362055, -0.4442409));
	target2 += mul(nf1, float4x4(0.014318134, 0.047169194, -0.07291308, 0.21408482, -0.01503884, 0.027093383, -0.11724912, -0.052458502, 0.1676504, 0.5505249, 0.22394833, -0.17126445, 0.13671164, -0.18371153, -0.456313, 0.14297491));
	target2 += mul(ng1, float4x4(0.00063476624, 0.16339731, -0.031160444, 0.18237135, 0.025692228, -0.04895109, 0.033651803, -0.002480504, 0.34582126, -0.039352335, -0.004698449, 0.12789944, -0.08318657, -0.007492543, -0.12888806, 0.03684109));
	target2 += mul(nh1, float4x4(-0.06481498, 0.14330916, 0.17366715, -0.028045174, 0.080571376, 0.18343642, -0.11593154, -0.077227145, 0.1973531, 0.3085006, -0.28876102, 0.06434657, 0.16654246, -0.28144804, 0.3234261, -0.026636604));
	target2 += mul(ni1, float4x4(-0.084783904, 0.03651458, 0.020044886, -0.10723048, 0.04165204, 0.04072967, 0.037039082, -0.09042298, 0.19693066, -0.21291414, -0.040890995, -0.15434273, -0.07450638, 0.27289733, 0.06332989, -0.037289053));
	target2 += mul(na2, float4x4(-0.004840926, 0.048929166, 0.015578959, 0.03571025, -0.2184971, 0.094020076, -0.17748803, 0.32877877, -0.035392962, -0.28398407, -0.13072185, -0.21858144, -0.24103665, -0.32654533, -0.063572675, -0.008728733));
	target2 += mul(nb2, float4x4(0.0060240547, 0.029166108, -0.023887299, 0.037508924, 0.04231956, 0.1503379, 0.17414866, -0.25778973, -0.14774446, -0.12541369, -0.32502824, 0.28957245, -0.030400498, 0.05351274, 0.13189505, -0.21329227));
	target2 += mul(nc2, float4x4(0.2198507, -0.49962172, -0.16456802, 0.08402717, -0.094403476, -0.1978019, -0.19233316, 0.055013265, 0.01668743, -0.117106654, -0.0745593, -0.09377295, 0.050370943, 0.07410238, 0.13543247, -0.23753798));
	target2 += mul(nd2, float4x4(0.008572295, 0.11890422, -0.047157902, -0.03717175, -0.35570037, 0.060663674, 0.109250925, -0.16135052, 0.030490266, 0.30335435, 0.38949126, 0.44852075, -0.09788441, 0.43574813, -0.30050707, 0.24572986));
	target2 += mul(ne2, float4x4(0.29497403, -0.30934516, 0.05756695, -0.15919119, -0.121505864, -0.028917443, -0.07419939, 0.13863774, -0.04398897, 0.32990414, 0.38306457, -0.030523712, 0.72267497, 0.33932966, 0.07839862, 0.11931982));
	target2 += mul(nf2, float4x4(0.26952964, -0.31019664, 0.07061176, -0.23266664, 0.14124376, 0.3597343, -0.17694736, 0.22935267, -0.12335108, -0.086614646, -0.10635, 0.22585274, -0.27139255, 0.05963002, 0.2852169, -0.3743854));
	target2 += mul(ng2, float4x4(0.0970178, -0.014084432, -0.0504985, 0.1570353, 0.091999866, 0.23429315, 0.12914294, 0.03267318, 0.5849793, 0.38205758, -0.31792474, -0.07992281, 0.022620765, 0.22215942, -0.23093775, 0.0026896205));
	target2 += mul(nh2, float4x4(-0.06753083, -0.20358866, 0.173053, 0.13768815, 0.013206715, 0.06310567, 0.17349118, -0.12714109, 0.0405548, -0.18409975, 0.3441249, -0.24606577, -0.18814458, -0.039655812, -0.15961805, 0.08212082));
	target2 += mul(ni2, float4x4(0.06746224, -0.1595078, 0.15284725, -0.057313897, -0.1229526, 0.11482664, -0.0021675595, -0.00026835455, -0.0653958, -0.0967453, -0.09400396, -0.021233113, 0.23587836, 0.2982212, -0.039116163, 0.012201323));
	target2 += float4(0.049680557, 0.01432493, 0.04349397, 0.040003702);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
}


//!PASS 3
//!DESC Conv-4x3x3x16
//!IN tex3, tex4
//!OUT tex1, tex2
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass3(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	float4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	float4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	float4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	float4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	float4 e1 = tex3.SampleLevel(sam, pos, 0);
	float4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	float4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	float4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	float4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	float4 na1 = max(-a1, 0);
	float4 nb1 = max(-b1, 0);
	float4 nc1 = max(-c1, 0);
	float4 nd1 = max(-d1, 0);
	float4 ne1 = max(-e1, 0);
	float4 nf1 = max(-f1, 0);
	float4 ng1 = max(-g1, 0);
	float4 nh1 = max(-h1, 0);
	float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	float4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	float4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	float4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	float4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	float4 e2 = tex4.SampleLevel(sam, pos, 0);
	float4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	float4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	float4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	float4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	float4 na2 = max(-a2, 0);
	float4 nb2 = max(-b2, 0);
	float4 nc2 = max(-c2, 0);
	float4 nd2 = max(-d2, 0);
	float4 ne2 = max(-e2, 0);
	float4 nf2 = max(-f2, 0);
	float4 ng2 = max(-g2, 0);
	float4 nh2 = max(-h2, 0);
	float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	float4 target1 = mul(a1 , float4x4(-0.07314084, 0.08021976, -0.08299374, -0.21340942, -0.0088407695, 0.04742526, -0.038566757, -0.058931205, 0.0009213959, 0.19193986, -0.05906689, -0.0038934543, -0.12937409, 0.100754194, 0.1683601, 0.07552924));
	target1 += mul(b1 , float4x4(-0.022257961, 0.08347593, -0.02279838, 0.10150892, -0.02083181, 0.07064587, 0.26308942, -0.13609628, 0.023648601, 0.1475858, 0.12856342, 0.2650287, -0.038316045, -0.35173503, 0.09157486, 0.16609442));
	target1 += mul(c1 , float4x4(-0.13746555, 0.15315081, -0.032931942, 0.07487079, 0.09694968, 0.014459765, 0.06814075, -0.059461202, 0.25045857, -0.0071333316, 0.067206055, -0.21697883, 0.023228496, -0.13146883, 0.07486156, -0.030696157));
	target1 += mul(d1 , float4x4(-0.0069204876, -0.18402638, 0.085326575, 0.18288516, 0.036785558, -0.019116882, 0.017438713, 0.029095992, 0.10944869, -0.09473364, 0.10444152, -0.028845368, 0.0909169, -0.10593229, 0.14518781, 0.05546837));
	target1 += mul(e1 , float4x4(0.53389466, -0.018921841, -0.05050542, 0.21149407, 0.3041209, -0.2594824, -0.18464427, 0.20736529, 0.18971719, -0.05058395, -0.13514072, -0.009045264, 0.20910244, 0.29242986, 0.28958234, 0.2870443));
	target1 += mul(f1 , float4x4(0.03259606, 0.2126493, 0.6004735, 0.14007168, -0.1424266, 0.04352873, 0.17071731, 0.10630275, -0.2755667, 0.27345222, -0.06420644, 0.032743722, 0.026045147, -0.23541754, 0.01393772, -0.1476582));
	target1 += mul(g1 , float4x4(0.06258474, -0.040185593, -0.092409454, -0.095720276, 0.050550956, -0.026547447, 0.099580996, 0.04878719, 0.15659782, -0.007606541, -0.061156776, 0.11329769, -0.019249229, 0.028775204, -0.24508974, -0.052828208));
	target1 += mul(h1 , float4x4(-0.16975857, -0.008542089, 0.30186546, 0.33199415, 0.03747256, 0.15057808, 0.017838268, -0.030345246, 0.019341556, 0.3217693, 0.24844399, 0.06951953, -0.10805396, -0.08874898, -0.068681985, -0.2677526));
	target1 += mul(i1 , float4x4(-0.06813968, 0.087481484, -0.11338694, -0.08698839, -0.07585716, 0.079565816, -0.066336565, 0.050449606, 0.11338618, 0.38572344, 0.0024759274, 0.12706435, 0.16759671, 0.0254419, -0.06910047, -0.21917519));
	target1 += mul(a2 , float4x4(0.0039553675, -0.17838223, 0.038052835, 0.027201787, 0.06518285, 0.08250212, -0.052679926, -0.021249574, -0.13604519, 0.12234797, -0.16008313, -0.07422232, -0.0930264, -0.07480355, -0.0067053377, 0.13964424));
	target1 += mul(b2 , float4x4(-0.05491681, 0.16191071, -0.13063031, -0.2889149, -0.045188528, 0.29249623, -0.061093148, -0.083284624, -0.19250835, -0.103631295, -0.23577131, 0.108691126, 0.028907659, -0.2708106, 0.06986715, 0.22996326));
	target1 += mul(c2 , float4x4(-0.07838976, -0.063634194, 0.06297176, -0.09969828, 0.10518915, 0.062185638, 0.033053298, 0.023406805, -0.2801067, -0.13414349, -0.02466297, -0.1110011, 0.040580552, 0.033576507, 0.07127022, -0.068416506));
	target1 += mul(d2 , float4x4(-0.05786512, 0.17169164, -0.09276801, -0.1444394, 0.13971466, -0.168134, 0.012722911, 0.06788442, 0.02493809, 0.04105174, 0.09471395, 0.21363391, -0.12093948, 0.067423604, -0.054669242, 0.06764739));
	target1 += mul(e2 , float4x4(0.2954526, 0.15885043, -0.05164922, 0.3646313, 0.013329013, 0.044056762, 0.01717495, -0.030439444, 0.32433322, -0.29044852, 0.32627285, 0.150364, 0.14502852, -0.22193567, -0.18879528, 0.018430077));
	target1 += mul(f2 , float4x4(-0.2973998, -0.41863972, 0.0048396075, 0.06709588, -0.12029818, -0.05315725, -0.11457002, 0.0071458486, 0.26290894, 0.11030596, 0.082195595, -0.27480638, -0.011602335, 0.019122265, -0.18927693, -0.24246486));
	target1 += mul(g2 , float4x4(0.09974451, 0.07223917, -0.09586719, -0.08288307, -0.06436462, -0.027324842, -0.0019976476, 0.19203754, 0.015929956, -0.12534836, -0.0038582094, 0.11275662, -0.031039666, 0.010430081, -0.023713758, -0.21801127));
	target1 += mul(h2 , float4x4(0.054167796, 0.0634282, -0.047591783, -0.06402415, -0.0709014, 0.082054086, 0.28418478, 0.06584792, -0.18744822, -0.006312915, -0.0075474046, 0.0829434, -0.032414634, 0.19225785, -0.082302466, -0.3142319));
	target1 += mul(i2 , float4x4(-0.0026932533, -0.110426664, 0.021643564, -0.14368293, -0.0048789545, 0.11043582, -0.040021945, 0.058764413, -0.009000321, 0.10833911, 0.05681704, -0.039960742, 0.0014395626, 0.022780152, -0.09172437, -0.085687816));
	target1 += mul(na1 , float4x4(0.12509525, -0.18352552, -0.07638094, -0.00756009, 0.05407378, -0.14584734, -0.08163636, -0.13222884, 0.039648265, -0.15960212, 0.074228585, 0.009451507, 0.17933762, -0.17743796, 0.007834195, 0.0037116117));
	target1 += mul(nb1 , float4x4(-0.10942205, 0.1585392, 0.040241007, 0.10526164, 0.16979292, 0.29029292, -0.009487742, 0.24926443, -0.1047842, 0.03604099, 0.19281772, 0.03798268, 0.17581491, 0.25031644, 0.055782937, -0.30455682));
	target1 += mul(nc1 , float4x4(0.06714908, -0.09112766, -0.022286715, 0.09795178, -0.014092309, 0.26703134, 0.15334776, 0.33441234, 0.13753732, -0.13819148, 0.22796239, 0.16050872, 0.05523446, 0.082806356, -0.053028688, -0.0400533));
	target1 += mul(nd1 , float4x4(-0.028462043, 0.18224953, 0.026658487, -0.15048791, 0.106156826, -0.07361365, 0.3529029, 0.06473894, -0.032005392, 0.037034214, 0.039220046, -0.012491292, -0.09503139, 0.0444902, -0.31978187, -0.2923563));
	target1 += mul(ne1 , float4x4(-0.3674723, 0.22560489, 0.38837367, 0.17128418, -0.0948159, 0.6298207, 0.59135467, 0.3350841, -0.1859739, 0.31080073, 0.03317792, 0.20958795, -0.097624235, -0.07605166, 0.10135128, -0.08953993));
	target1 += mul(nf1 , float4x4(0.320043, 0.002823138, -0.08849585, -0.06356955, 0.19898786, 0.272037, 0.1241285, 0.18131523, -0.05760319, -0.19315276, -0.033923294, 0.09981398, -0.07670874, -0.25949827, 0.062826484, 0.011877337));
	target1 += mul(ng1 , float4x4(-0.019341033, -0.03938962, 0.10163529, 0.05033707, -0.03194324, -0.13427012, 0.16106506, -0.05596736, -0.04438277, 0.0045224032, 0.20575951, -0.10359912, 0.03423479, -0.17256664, 0.32534334, -0.09378658));
	target1 += mul(nh1 , float4x4(0.19792143, 0.038506437, -0.21047395, -0.27926794, 0.23113485, -0.053830303, 0.4963027, 0.34639266, 0.108149074, -0.10592886, 0.09575202, 0.12385147, 0.08751849, -0.050622147, 0.033647005, 0.2588364));
	target1 += mul(ni1 , float4x4(0.04931599, -0.14498134, 0.0073008477, -0.05298649, 0.29398152, 0.16829367, 0.089691155, -0.01749789, 0.20039341, -0.13137043, 0.1884996, -0.03018221, -0.06793498, -0.03220071, 0.06326444, 0.017898731));
	target1 += mul(na2 , float4x4(0.011310341, 0.15556115, -0.08003895, -0.07396207, -0.06434896, -0.14684777, -0.019239893, 0.009520887, 0.013242985, -0.12733786, -0.040152796, 0.0064262203, 0.087119006, 0.08165867, 0.12353576, 0.002600503));
	target1 += mul(nb2 , float4x4(0.14877501, -0.056240283, -0.11846124, 0.16736585, -0.0018247389, 0.0095979795, -0.07605829, 0.13583913, -0.008851887, 0.16578445, -0.04152669, -0.059164364, -0.021962654, 0.312347, 0.0129089225, -0.097307086));
	target1 += mul(nc2 , float4x4(-0.122485265, 0.06891502, -0.1807204, 0.10579281, -0.0061903363, -0.025644284, 0.08879091, -0.09492319, -0.019361734, -0.10903786, -0.08949264, 0.055067465, -0.027095577, -0.06629012, -0.05580654, 0.045552503));
	target1 += mul(nd2 , float4x4(-0.025895944, 0.18728323, 0.09764548, 0.49504116, -0.030123139, -0.012580951, 0.090377375, -0.18767111, -0.06874367, 0.11378584, 0.0127285635, -0.101479106, 0.07010412, -0.02272616, -0.03455195, 0.040611476));
	target1 += mul(ne2 , float4x4(-0.58637494, -0.13186562, -0.26627728, -0.40135092, 0.19139144, 0.27310577, 0.07761293, 0.10058002, -0.3126869, -0.07982417, 0.04237517, 0.25126198, -0.17133251, 0.122523, -0.0053142905, -0.22283912));
	target1 += mul(nf2 , float4x4(-0.0023953887, 0.30968156, -0.1303385, 0.046937056, 0.20530851, 0.07276076, -0.086923674, -0.17881633, 0.08715105, 0.25641996, -0.22557895, -0.0017721896, -0.2347971, -0.07164777, -0.103000194, 0.22468017));
	target1 += mul(ng2 , float4x4(-0.12947787, -0.05199853, -0.0899567, 0.087013826, 0.018399805, 0.14997742, -0.20396905, -0.20554177, -0.014265392, 0.048660364, 0.07077151, -0.05911514, 0.003051989, 0.07242704, -0.16232954, 0.19634365));
	target1 += mul(nh2 , float4x4(0.13121666, 0.03174777, 0.07853035, -0.04881682, 0.10043158, -0.036237933, -0.2178651, -0.06562213, 0.021113047, 0.0068006255, -0.16305129, -1.9600706e-05, -0.14886445, -0.17729987, -0.17907865, 0.21547341));
	target1 += mul(ni2 , float4x4(-0.03263096, -0.064234234, 0.03990361, 0.09057224, -0.05704657, -0.107518636, 0.09328312, 0.014857798, -0.060736485, -0.033695858, -0.07943859, -0.0054049506, -0.072932534, -0.023306495, -0.06615389, 0.029145932));
	target1 += float4(0.0052180276, 0.022526434, 0.022657124, 0.016289035);

	float4 target2 = mul(a1 , float4x4(0.012031344, 0.0075636036, -0.033211436, 0.018453801, -0.23412584, -0.113123864, 0.068607934, -0.018517016, -0.19748597, -0.2571716, -0.026148321, -0.00019766031, 0.012040108, 0.12122093, 0.0714374, -0.10087335));
	target2 += mul(b1 , float4x4(-0.029292978, -0.025254043, -0.034099232, 0.085234866, 0.24252516, 0.076297395, -0.12717746, -0.03457669, 0.033755753, -0.0531509, -0.04005856, -0.20840853, -0.0078028045, 0.12575904, -0.010887013, -0.046326064));
	target2 += mul(c1 , float4x4(-0.003266499, -0.017687857, -0.012221699, -0.2251586, 0.00208294, 0.007880196, 0.09037794, 0.08328994, -0.0428717, 0.027112724, 0.08032711, 0.1513152, -0.043068174, 0.07987632, -0.008801098, 0.08133886));
	target2 += mul(d1 , float4x4(-0.1827595, 0.18459928, -0.1918044, -0.05324067, -0.1705114, -0.01887987, -0.14486305, -0.17456877, -0.18964832, -0.07162095, -0.13871318, -0.046433818, -0.018604748, -0.11131921, -0.08050445, -0.08619502));
	target2 += mul(e1 , float4x4(-0.0717377, -0.12163745, 0.18497953, -0.08643892, 0.0007879318, -0.050351888, 0.17640385, 0.17240365, -0.14958718, -0.056793597, 0.03742872, -0.1015922, 0.3117469, -0.39953762, 0.0152286505, -0.13784732));
	target2 += mul(f1 , float4x4(0.07879097, -0.39204946, -0.07003556, -0.24708183, -0.058046583, -0.09865189, -0.048411854, -0.05027539, -0.12736927, -0.23946127, -0.08323304, 0.028160958, -0.059784077, -0.0064917994, 0.038013496, 0.08928725));
	target2 += mul(g1 , float4x4(0.07403741, -0.004601062, 0.13563065, 0.054981887, -0.08022936, 0.022921488, -0.053264186, -0.016605966, -0.20883927, -0.19978985, -0.058101434, 0.15126002, 0.020758694, 0.12837122, 0.13368484, 0.1443778));
	target2 += mul(h1 , float4x4(-0.08701922, -0.041025855, -0.03362371, -0.19846733, -0.009003309, 0.06708822, 0.06784735, 0.049892817, 0.123487085, -0.008921262, -0.0883188, -0.09103165, 0.070733, 0.1474191, -0.08228257, 0.12713781));
	target2 += mul(i1 , float4x4(0.16015989, 0.19007389, -0.12680867, 0.056614764, -0.008470681, 0.099433914, 0.008811413, -0.09471121, -0.09722353, 0.0649324, 0.021527816, -0.21614286, 0.07569941, -0.16433574, -0.0069269636, 0.16142729));
	target2 += mul(a2 , float4x4(-0.08708631, -0.017263759, 0.034016605, -0.009168008, -0.16427393, -0.11225274, -0.005249783, 0.13672975, -0.0844234, -0.022700429, 0.109927036, -0.041033685, -0.064794436, 0.015655773, -0.03411672, -0.12218549));
	target2 += mul(b2 , float4x4(-0.016761513, -0.027447775, -0.01290059, 0.0007822344, 0.07433617, -0.035145793, -0.03797909, -0.16871531, -0.029095095, -0.2073536, 0.12309633, -0.16626619, -0.04203133, -0.018517911, -0.06946039, -0.11132114));
	target2 += mul(c2 , float4x4(0.11052091, -0.030863507, -0.03229482, 0.11673996, -0.0455341, -0.00649463, 0.020642368, 0.04092308, 0.20173405, -0.012926573, -0.0244531, 0.055338163, -0.01835753, 0.024072325, -0.06893433, 0.048774183));
	target2 += mul(d2 , float4x4(0.3568486, -0.14506009, -0.13730963, -0.027905643, -0.37042627, -0.016187102, 0.12948507, 0.016912838, -0.089135066, -0.15287507, -0.092210636, 0.043153215, 0.2077129, 0.04429632, -0.107345045, -0.015176141));
	target2 += mul(e2 , float4x4(-0.33605802, -0.22235338, 0.1270437, -0.23185425, 0.29133183, -0.005394921, -0.07139614, -0.049961478, 0.017125877, 0.499106, -0.0048643304, -0.14794266, -0.06752325, 0.29848218, 0.11979753, 0.033426132));
	target2 += mul(f2 , float4x4(0.11241839, -0.09014392, -0.011629057, 0.17028853, -0.100855775, 0.100789815, -0.05269513, 0.06573697, 0.27869916, -0.057539526, -0.04528007, 0.30135208, -0.02261679, 0.0688468, 0.059139624, 0.13873443));
	target2 += mul(g2 , float4x4(0.04780322, -0.008265764, -0.014270074, 0.0834061, 0.055182222, -0.059819162, 0.010733226, -0.040952608, -0.14509161, 0.17645077, 0.05801798, -0.042507146, 0.24863482, 0.1040497, -0.045867782, 0.120007925));
	target2 += mul(h2 , float4x4(0.12579694, 0.09167574, 0.21078496, 0.052945495, -0.05036728, -0.11384816, -0.07594621, -0.09991826, 0.010668207, -0.05676672, -0.06273805, -0.06883917, -0.2184931, -0.1647689, -0.056467786, 0.109850615));
	target2 += mul(i2 , float4x4(-0.11352159, 0.026516005, 0.042277884, 0.14155892, -0.017015357, -0.03407179, 0.014961351, -0.13766216, 0.20035928, -0.038310144, 0.002857473, -0.04447413, 0.011375937, -0.07345281, 0.01680756, 0.0089689195));
	target2 += mul(na1 , float4x4(0.18048844, 0.025165293, -0.013590799, 0.21590467, 0.026852742, -0.06107904, -0.0012434963, 0.047840245, -0.07294931, -0.011157553, 0.11376999, -0.0086454, -0.028179385, -0.11118097, -0.15483098, 0.19983171));
	target2 += mul(nb1 , float4x4(-0.15175144, 0.2142459, 0.1478812, -0.14039889, -0.19821295, -0.37290373, 0.19691283, 0.115997985, 0.1284214, 0.19273835, -0.096292645, -0.022643294, 0.15401742, -0.2267051, -0.15150996, 0.099672556));
	target2 += mul(nc1 , float4x4(-0.068340585, -0.017279925, 0.04846922, -0.034003776, 0.055793036, -0.25135002, -0.03544407, -0.56164503, -0.19032021, -0.009258663, 0.070812754, -0.08191077, 0.047685042, -0.020684654, -0.07035788, 0.0132855335));
	target2 += mul(nd1 , float4x4(0.19441503, -0.15030424, 0.12302495, 0.047762766, -0.095896654, -0.15033515, 0.007605368, 0.0570889, -0.038431447, -0.08560695, -0.0029293734, -0.01375586, 0.047505997, 0.014071177, 0.1479392, 0.25642776));
	target2 += mul(ne1 , float4x4(-0.28587586, -0.39141047, -0.3444917, -0.2408476, -0.64026415, -0.35294148, -0.1317, -0.21601357, 0.12164572, -0.48452628, 0.16729403, -0.21575572, 0.41301385, 0.017696327, 0.057344552, -0.27020162));
	target2 += mul(nf1 , float4x4(-0.033119988, 0.0012006643, 0.08465847, 0.015564506, -0.124659166, -0.09455984, 0.0035544615, -0.35156307, -0.15252608, 0.016244112, 0.0138391815, -0.04670501, 0.1383293, -0.037926193, 0.025957817, 0.1730784));
	target2 += mul(ng1 , float4x4(-0.012701927, -0.025511298, -0.06721094, -0.07040279, 0.06377799, 0.13967788, -0.14234799, -0.058825023, 0.041205924, -0.00032473358, -0.055379577, -0.033738375, 0.13665317, -0.02562686, -0.18523781, -0.06958092));
	target2 += mul(nh1 , float4x4(0.17461562, 0.07647785, -0.02202248, 0.21096313, -0.22494456, 0.10868611, -0.33091828, -0.27529812, -0.25206757, 0.1884099, -0.17850949, -0.1006927, 0.045536183, -0.100012675, 0.061030168, -0.025509179));
	target2 += mul(ni1 , float4x4(0.0337314, -0.052486207, -0.05584458, 0.0969859, 0.18508333, -0.04521821, -0.08331424, 0.076726556, 0.118076116, 0.019730117, 0.022492286, 0.09869008, -0.115276754, 0.097966135, 0.023186501, -0.060849246));
	target2 += mul(na2 , float4x4(-0.09427026, 0.14057149, -0.07478311, 0.029171692, 0.14987083, -0.08649628, -0.01750609, 0.06958318, 0.085471064, -0.058146793, -0.029388946, 0.10720532, -0.030614216, 0.17328379, -0.03433174, -0.022483094));
	target2 += mul(nb2 , float4x4(-0.085193954, -0.1348099, 0.07675298, -0.25627816, -0.07467235, -0.18559869, 0.100543626, -0.2201029, -0.015106581, -0.013150452, 0.10482805, -0.04446529, -0.15954255, 0.13659625, -0.10310867, -0.010787774));
	target2 += mul(nc2 , float4x4(-0.13365999, 0.02036792, -0.09569852, -0.088586286, 0.18445042, -0.14354594, -0.09319419, 0.084703825, -0.018052364, 0.04344066, -0.0589665, -0.0065992875, 0.030960705, 0.08472253, -0.022175593, -0.020301547));
	target2 += mul(nd2 , float4x4(-0.12315616, 0.05191162, 0.3044562, -0.066225395, 0.13523789, 0.24786936, -0.2531183, 0.008910162, 0.3662465, 0.2633546, -0.11816884, -0.108501054, -0.30446148, 0.094746254, 0.22515038, -0.048324294));
	target2 += mul(ne2 , float4x4(0.34875512, 0.22885701, -0.22425419, 0.30605644, 0.13452671, 0.16655035, -0.10293953, 0.23753232, -0.5908745, -0.15148452, -0.3885865, 0.14085245, -0.12627047, -0.09645269, 0.101941, -0.062304396));
	target2 += mul(nf2 , float4x4(-0.18468879, 0.11713357, 0.04766135, -0.25752118, 0.076471716, 0.06850848, -0.06427401, 0.028061042, 0.017875634, 0.09589284, -0.020327348, -0.1585817, 0.19669123, 0.10955879, -0.18545902, -0.074755065));
	target2 += mul(ng2 , float4x4(0.1056897, 0.08521911, -0.017700022, -0.004319419, 0.15351436, -0.11358399, 0.065656774, 0.101860404, 0.08894655, -0.060075074, 0.14363492, -0.10447328, -0.27426496, -0.19959188, 0.16687778, -0.09456175));
	target2 += mul(nh2 , float4x4(-0.05424188, -0.16305181, 0.028440254, -0.013702167, -0.010122417, -0.13160124, 0.08733208, 0.111403994, -0.13586052, 0.016545279, 0.12953275, -0.01298413, 0.19755821, 0.029597677, 0.004327247, 0.093656704));
	target2 += mul(ni2 , float4x4(-0.016224308, -0.020333769, 0.015944391, -0.044774864, 0.09308092, -0.06174809, 0.009493231, 0.00109714, 0.030341865, 0.0085925255, 0.023199126, 0.029012285, 0.050746094, 0.15161276, 0.053011492, -0.022610705));
	target2 += float4(-0.034925383, -0.0010656221, -0.023427188, -0.021127155);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
}


//!PASS 4
//!DESC Conv-4x3x3x16, Depth-to-Space
//!IN INPUT, tex1, tex2
//!OUT OUTPUT
//!BLOCK_SIZE 16
//!NUM_THREADS 64

void Pass4(uint2 blockStart, uint3 threadId) {
	uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
	
	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = ((gxy >> 1) + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	float4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	float4 e1 = tex1.SampleLevel(sam, pos, 0);
	float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	float4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	float4 na1 = max(-a1, 0);
	float4 nb1 = max(-b1, 0);
	float4 nc1 = max(-c1, 0);
	float4 nd1 = max(-d1, 0);
	float4 ne1 = max(-e1, 0);
	float4 nf1 = max(-f1, 0);
	float4 ng1 = max(-g1, 0);
	float4 nh1 = max(-h1, 0);
	float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	float4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	float4 e2 = tex2.SampleLevel(sam, pos, 0);
	float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	float4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	float4 na2 = max(-a2, 0);
	float4 nb2 = max(-b2, 0);
	float4 nc2 = max(-c2, 0);
	float4 nd2 = max(-d2, 0);
	float4 ne2 = max(-e2, 0);
	float4 nf2 = max(-f2, 0);
	float4 ng2 = max(-g2, 0);
	float4 nh2 = max(-h2, 0);
	float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	float4 target1 = mul(a1, float4x4(0.009722335, -5.8660436e-05, -0.0069387504, -0.0052446183, -0.040276118, 0.0041334885, -0.013106614, -0.0047898176, -0.008160448, 0.011272557, -0.008908942, -0.015969492, 0.036588583, -0.0069453213, 0.03697349, 0.024233166));
	target1 += mul(b1, float4x4(0.07749142, -0.0112727145, 0.064222045, -0.015094554, 0.0032031287, 0.03247034, -0.016756386, 0.023846423, -0.028618578, 0.02300731, -0.015894018, 0.037608027, 0.014199439, -0.043177396, -0.004832348, -0.05518754));
	target1 += mul(c1, float4x4(0.008171211, -0.016406616, 0.04668373, -0.0020393482, -0.008888379, 0.001380358, -0.008963435, 0.0012900458, -0.030172894, -0.0017824832, -0.037534058, 0.000615256, 0.030373376, 0.002216906, 0.04730168, -0.0028000386));
	target1 += mul(d1, float4x4(0.060749017, 0.006499037, -0.03925888, -0.043421242, 0.0014141012, -0.040274277, 0.020051334, 0.02141008, -0.0046555796, -0.032477897, 0.02811765, 0.014327698, 0.008681297, 0.044408746, -0.028984996, 0.00985357));
	target1 += mul(e1, float4x4(0.22245905, 0.2221309, 0.21369153, 0.17244695, -0.16802068, -0.09160697, -0.13712268, -0.104401335, -0.18699472, -0.117237985, -0.13240008, -0.121350996, 0.027870163, 0.09320937, 0.07950856, 0.08880132));
	target1 += mul(f1, float4x4(-0.002709059, -0.0070304363, 0.10570918, 0.08184527, -0.014383472, -0.020202143, -0.0810668, -0.054163996, -0.018711304, -0.035145987, -0.098869935, -0.06942387, -0.011235106, 0.008683168, -0.02585752, 0.024761796));
	target1 += mul(g1, float4x4(-0.017611317, 0.033189557, 0.0014886355, 0.0063918163, 0.0033280635, 0.00871624, 0.018652624, 0.0072240643, 0.028240945, 0.027274653, -0.0044101775, 0.012503479, -0.009022953, -0.0037992215, 0.007457012, -0.0075594983));
	target1 += mul(h1, float4x4(-0.042642962, 0.061122447, -0.0661494, 0.046923082, 0.014721836, -0.07878182, 0.013244828, -0.047850955, 0.016932828, -0.07947459, 0.05953852, -0.007192553, -0.022235982, -0.026965706, -0.034282424, -0.007242096));
	target1 += mul(i1, float4x4(-0.012262586, -0.014608243, -0.0039572082, 0.045586918, 0.011789637, 0.00811699, 0.004699602, -0.032348834, 0.017336411, 0.00069143757, 0.000303623, -0.061924953, -0.0064005707, -0.0043993946, -0.008697915, -0.012118654));
	target1 += mul(a2, float4x4(-0.0012260727, 0.006306051, -0.004919151, -0.014706935, 0.06893623, -0.03855539, 0.0025126948, -0.013461133, 0.051023327, -0.015535766, -0.0125827445, -0.059677888, -0.0021585734, -0.019920474, -0.025212945, 0.017173553));
	target1 += mul(b2, float4x4(-0.014818789, -0.004695369, 0.11874947, -0.025116654, -0.010446815, -0.015087738, 0.060040206, -0.053225394, -0.059700467, -0.0084348805, 0.11633143, 0.01912765, -0.046732634, 0.02437617, 0.014276953, -0.017528424));
	target1 += mul(c2, float4x4(0.03403683, 0.035661116, -0.05422196, 0.00086722866, 0.0069361166, 0.0030528181, 0.0011153776, 0.0040823813, -0.052100085, 0.016703505, -0.16275159, 0.019807467, -0.0046826405, -0.01290693, -0.00867241, -0.0074261916));
	target1 += mul(d2, float4x4(0.091117546, 0.050540023, -0.018510593, -0.007402161, -0.1193577, 0.018770888, -0.011340929, -0.02110343, -0.032088384, 0.010691935, 0.004420295, -0.025953075, 0.047472738, 0.108008265, 0.007997121, -0.03855365));
	target1 += mul(e2, float4x4(-0.21882823, -0.18101972, 0.13662423, 0.3109504, -0.101242945, 0.3064065, -0.22530204, 0.2612257, -0.07345098, 0.31937975, -0.15872811, 0.23400135, -0.04057178, -0.11676629, -0.34227282, -0.18310128));
	target1 += mul(f2, float4x4(-0.01088255, 0.026722692, -0.0071181543, -0.07676996, -0.054152276, -0.08521186, -0.029249348, 0.005593179, 0.012496848, -0.055432145, 0.06396825, 0.056608576, -0.006908986, 0.018192623, -0.027572934, 0.03749799));
	target1 += mul(g2, float4x4(-0.00788736, 0.032808263, -0.0034198891, -0.01124656, 0.014423269, 0.058434688, 0.0139339, 0.0024755867, 0.042650267, 0.01773591, 0.017099075, 0.00094137667, 0.033293027, 0.008411577, 0.018532667, 0.016402127));
	target1 += mul(h2, float4x4(0.0013495176, -0.05906597, -0.011892358, -0.04260839, 0.0040078545, -0.12263263, -0.005952629, -0.031151159, 0.009523005, -0.04784067, 0.07216081, 0.007988283, -0.010771301, -0.019751243, 0.017268918, -0.1053882));
	target1 += mul(i2, float4x4(0.021729292, -0.006699109, -0.017977247, -0.008347603, 0.030392287, -0.035512295, 0.047333952, -0.061986152, -0.00917743, -0.023669569, -0.051791556, -0.057909377, -0.008901611, -0.010565621, -0.022557132, -0.06957076));
	target1 += mul(na1, float4x4(-0.096115954, 0.013176027, -0.046984393, -0.0064583416, -0.13834997, -0.024369081, 0.049557988, -0.013092948, 0.10623086, -0.0071193436, 0.025198812, -0.00963305, -0.051104847, 0.009814798, 0.0050332784, 0.0058091953));
	target1 += mul(nb1, float4x4(0.03568169, 0.01623718, -0.0020163557, 0.043042913, 0.027783269, -0.06342661, 0.10441675, 0.031614527, -0.17076227, 0.07228563, 0.04167568, 0.022664918, 0.0002446228, 0.01977757, -0.14741875, 0.03596493));
	target1 += mul(nc1, float4x4(-0.028803155, 0.02343672, -0.037556753, 0.004386295, 0.023776755, -0.0024816473, 0.0017886858, -0.005105568, 0.008360341, -0.008270227, -0.12140172, 0.047693867, -0.03565588, -0.0082427105, 0.012581843, 0.0018308035));
	target1 += mul(nd1, float4x4(0.17737128, -0.23239174, 0.14191973, 0.0083567705, 0.022397157, -0.20152177, 0.076320365, 0.11157701, 0.005601583, -0.06157629, -0.060806494, 0.03030779, -0.17968388, -0.2081318, 0.051927045, 0.075377926));
	target1 += mul(ne1, float4x4(-0.28773892, -0.26089972, -0.13325682, -0.46006975, 0.35241324, 0.29463127, -0.16573308, 0.022810405, 0.388681, -0.036075145, 0.2998638, -0.15629162, 0.14321181, 0.10493886, -0.052218314, -0.27016288));
	target1 += mul(nf1, float4x4(0.03584634, 0.006315728, -0.08617273, -0.024391597, -0.016952977, 0.022077272, 0.12980743, 0.04512367, 0.003348057, 0.0946866, 0.16312122, 0.13436604, -0.011872978, -0.031965427, 0.0024880085, 0.033216927));
	target1 += mul(ng1, float4x4(0.016087456, 0.043138605, -0.028770814, 0.0061788377, 0.024897626, 0.10882443, -0.036830436, -0.009145524, -0.057872005, 0.08097352, -0.024710376, 0.0068731857, -0.018163942, -0.04771538, 0.027653048, 0.01914395));
	target1 += mul(nh1, float4x4(0.011542096, -0.073137596, 0.09102133, 0.049714323, -0.06767178, 0.070273116, -0.010473078, -0.120707616, -0.026583942, 0.0730171, -0.08226194, 0.105516605, 0.018596884, 0.05840729, 0.04693975, 0.0863541));
	target1 += mul(ni1, float4x4(0.0127724055, 0.02520005, -0.028792456, -0.06910211, -0.019357776, -0.026941938, 0.05015806, 0.12642363, -0.01354065, -0.015913904, 0.009398767, 0.034318734, -0.0034223567, -0.0146218045, -0.0067832484, -0.010091871));
	target1 += mul(na2, float4x4(-0.02916006, 0.014765165, 0.004575115, 0.0110705905, 0.024664888, 0.003658985, 0.0073659574, 0.0013673811, 0.02650946, 0.014014751, 0.026595473, 0.01877218, 0.016845545, -0.0031619575, -0.011036392, -0.014638798));
	target1 += mul(nb2, float4x4(0.012505482, 0.0023665216, -0.010882385, 0.009143886, -0.030671602, -0.004167823, 0.003649345, -0.00058618153, -0.038002256, -0.0061475867, -0.017000455, -0.015222981, 0.0066633034, 0.013324137, 0.022223728, 0.015254626));
	target1 += mul(nc2, float4x4(-0.019684946, -0.011194834, -0.011896193, -0.009636412, 0.0064974707, -0.018297167, -0.01162353, -0.00998448, 0.022304865, -0.0044090357, -0.0013151226, 0.009721475, -0.0029337434, 0.004208434, -0.008193774, 0.005379128));
	target1 += mul(nd2, float4x4(-0.012884837, -0.057319585, -0.002133779, -0.005586696, -0.03216661, 0.0015534499, -0.004120608, 0.0040779933, -0.044278033, 0.005608415, 0.009365155, 0.04694537, 0.024845028, 0.04563515, 0.018941263, 0.011450428));
	target1 += mul(ne2, float4x4(0.008597113, -0.010005085, -0.050961174, -0.07333081, 0.016683497, -0.056169543, -0.032008786, -0.037104256, -0.01117272, -0.011676191, -0.09071649, -0.049224474, 0.20027469, 0.06436799, 0.1351019, 0.069967836));
	target1 += mul(nf2, float4x4(0.022842692, 0.005048976, 0.05957191, 0.026581423, 0.03748738, 0.074060254, 0.053102568, 0.046449862, -0.013734466, -0.01722293, 0.030430514, -0.02180546, 0.007762467, -0.006432996, 0.08406507, 0.034061644));
	target1 += mul(ng2, float4x4(0.0048395037, 0.012762459, -0.0033284645, -0.0041399547, 0.01828778, 0.0043085683, 0.0019289661, -0.012415563, -0.023572162, -0.050695065, -0.013481175, -0.029202301, -0.03678883, -0.022862522, -0.025002036, -0.010764412));
	target1 += mul(nh2, float4x4(0.0075783907, 0.016249755, 0.0178703, 0.021285253, 0.013031193, 0.025416559, 0.043989707, 0.04750125, 0.0203218, 0.00335042, -0.024657877, -0.05417159, 0.0012374326, 0.115263805, -0.035001434, 0.049407292));
	target1 += mul(ni2, float4x4(0.0059729964, 0.017706383, 0.0004603757, 0.024557583, -0.014231813, 0.0022323965, -0.030447725, -0.005866556, 0.02305865, 0.02982909, 0.0549823, 0.06747715, -0.01014364, 0.0030060427, 0.01640388, 0.056874502));
	target1 += float4(0.0037637148, 0.003693704, 0.0034614028, 0.0033483643);

	float4 target2 = mul(a1, float4x4(-0.009785077, -0.007310227, 0.00081595866, -0.01268686, -0.014665477, -0.003956759, -0.0011089307, -0.011515727, 0.024502382, 0.025206817, 0.004246777, -0.0016346163, -0.016379429, -0.013535791, 0.01541915, 0.0095333215));
	target2 += mul(b1, float4x4(-0.017734146, 0.014389035, -0.0008451403, 0.013272096, 0.045607757, 0.01522117, 0.00904139, -0.001765619, 0.024920683, -0.012100507, 0.014870539, 0.0018603726, -0.030391455, 0.00632375, -0.055296343, -0.009885172));
	target2 += mul(c1, float4x4(0.0056769922, 0.0012991864, -0.014343983, 0.0073196087, 0.0061439234, -0.0009862045, 0.0323433, 0.0018582975, -0.00815158, -0.008821831, 0.016262496, -0.014280032, 0.024239268, 0.015745653, 0.016698766, 0.014503724));
	target2 += mul(d1, float4x4(0.039872967, -0.013257727, 0.055065673, 0.034231152, 0.086550154, 0.034081027, 0.045879394, 0.049891002, -0.011800151, -0.011743562, -0.015092318, -0.009334671, -0.017342495, -0.014658795, 0.014266523, 0.035314754));
	target2 += mul(e1, float4x4(-0.050990034, -0.06219798, -0.047669213, -0.07189862, -0.04856067, 0.031102043, 0.001354821, 0.01903025, 0.0037901315, 0.07694083, -0.016825065, 0.009997132, -0.18629807, -0.12768792, -0.104768254, -0.11861362));
	target2 += mul(f1, float4x4(0.017904822, 0.0042992756, 0.016748125, -0.025035992, -0.003724865, -0.0031921281, -0.019930473, 0.017328225, 0.024588963, 0.010205262, 0.04149686, 0.06978651, -0.022708472, -0.0057800277, -0.11644439, -0.06476094));
	target2 += mul(g1, float4x4(-0.02426752, -0.0034115477, -0.0015359819, 0.026405398, -0.013942422, 0.034148987, -0.009329464, -0.005556865, 0.010035298, 0.0042479886, -0.0045719417, -0.007970587, 0.0048700697, -0.0031006113, 0.005171075, 0.0020327016));
	target2 += mul(h1, float4x4(0.0015553721, -0.006999807, -0.027763836, -0.03493009, 0.0047000614, -0.034220867, 0.0021388065, 0.004188802, -0.007897541, -0.025793487, 0.017545879, 0.0013863312, 0.042826407, -0.050083816, 0.037378658, -0.011360738));
	target2 += mul(i1, float4x4(-0.007821516, -0.0034771548, 0.00051019643, 0.017586451, 0.01144453, 0.012032973, 0.0025295757, -0.011105711, 0.009102745, 0.015189803, -0.00083253905, -0.0025097867, -0.008002886, -0.020810502, -0.00023807488, -0.04825592));
	target2 += mul(a2, float4x4(0.005066405, 0.017425792, -0.0004840731, -0.0009944261, 0.07663847, -0.04755453, 0.004607992, -0.020050947, 0.021402068, -0.034427766, -0.0130948955, -0.042138048, 0.015383988, -0.0085578235, -0.036823586, 0.001125214));
	target2 += mul(b2, float4x4(-0.024459356, -0.019538784, 0.13201334, -0.025238393, -0.009611914, -0.017932015, 0.06330252, -0.05036921, -0.09405187, 0.0016108088, 0.07035366, -0.026231728, -0.036375783, 0.047566332, 0.033421457, 0.011572374));
	target2 += mul(c2, float4x4(0.03742729, 0.03181365, -0.05451164, -0.009032132, 0.017350135, -0.011311124, 0.0147211, -0.01298328, -0.011024085, 0.028534293, -0.12944345, 0.07152882, 0.005176979, -0.00048127733, -0.0063332263, -0.0034040876));
	target2 += mul(d2, float4x4(0.06455105, 0.033970848, -0.04488856, -0.027959615, -0.094514206, 0.033421617, 0.031325165, 0.0088970335, -0.031805996, 0.007078957, 0.008114225, -0.017701747, 0.048437405, 0.12445195, 0.02138049, -0.017392302));
	target2 += mul(e2, float4x4(-0.21116845, -0.17855385, 0.12160961, 0.32197994, -0.14490715, 0.2886178, -0.28124997, 0.21847156, -0.04988429, 0.32125694, -0.118747145, 0.26057142, -0.045630034, -0.1453716, -0.3682217, -0.22081932));
	target2 += mul(f2, float4x4(0.0057057277, 0.03872448, 0.020275556, -0.05959739, 0.0150841605, -0.02288727, 0.033048235, 0.08510421, 0.01309789, -0.050875448, 0.051518645, 0.041827686, -0.028529504, -0.0015568004, -0.023128182, 0.03178304));
	target2 += mul(g2, float4x4(0.0016438053, 0.028251547, 0.0003874817, -0.021485088, 0.008020942, 0.052520994, 0.009027988, 0.004729575, 0.026685065, 0.008003427, 0.013078419, -0.008256319, 0.022743277, -0.001293671, 0.018562315, 0.016649859));
	target2 += mul(h2, float4x4(0.013438089, -0.049052995, 0.0060880547, -0.044865325, 0.031890247, -0.102749884, 0.0047795745, -0.028551944, -0.018443404, -0.061510604, 0.031782348, -0.0005923042, 0.014257579, 0.010379952, 0.02929872, -0.090405114));
	target2 += mul(i2, float4x4(0.009318741, -0.0061841, -0.02420737, 0.0018885462, 0.022010826, -0.023001686, 0.035959963, -0.057635445, 0.012495818, -0.008206369, -0.026234211, -0.04719263, 0.0057711657, -0.003004966, 0.0046920753, -0.041684203));
	target2 += mul(na1, float4x4(-0.050602015, 0.021741746, -0.059019636, -0.008416951, -0.1789153, -0.01835426, 0.03100039, -0.017736796, 0.09091737, -0.026542341, 0.010933376, -0.031898204, -0.015792761, 0.013789206, 0.031699985, 0.018964434));
	target2 += mul(nb1, float4x4(0.099863164, -0.01637541, 0.083744444, 0.011983074, 0.013478042, -0.04780451, 0.08646149, 0.050255097, -0.22476238, 0.11746969, 0.038574144, 0.069615066, 0.047265753, -0.03212485, -0.12651724, -0.0065722666));
	target2 += mul(nc1, float4x4(-0.026888395, 0.0053314343, -0.0018114679, -0.007841625, 0.00037234774, -0.005450839, -0.03730409, -0.00441375, -0.014338566, 0.002887282, -0.19375902, 0.06374498, -0.033998128, -0.03480894, 0.061709825, -0.016935369));
	target2 += mul(nd1, float4x4(0.18882285, -0.19729713, 0.064650975, -0.07342598, -0.039107442, -0.28614163, 0.081506595, 0.111678764, -0.0019596675, -0.071805045, -0.019774346, 0.055490687, -0.1405711, -0.16753702, 0.031397972, 0.054546997));
	target2 += mul(ne1, float4x4(-0.007561914, 0.0010002917, 0.12623467, -0.17501056, 0.22664371, 0.2080332, -0.3194733, -0.1065412, 0.21299458, -0.23856679, 0.17237303, -0.2863369, 0.35997602, 0.354653, 0.15091361, -0.07142766));
	target2 += mul(nf1, float4x4(0.02403396, 0.0037063402, -0.004992154, 0.047530055, -0.03227084, -0.0055595553, 0.06554937, -0.025955811, -0.03792351, 0.041418597, 0.04285587, -0.0118592, 0.00012291886, -0.013734975, 0.07748641, 0.14016038));
	target2 += mul(ng1, float4x4(0.015037119, 0.058259863, -0.020877289, -0.0059153647, 0.04133679, 0.108832926, -0.026314106, -0.0010898053, -0.057873078, 0.07802038, -0.029681025, 0.020011986, -0.03940851, -0.038397703, 0.013701823, 0.01657068));
	target2 += mul(nh1, float4x4(-0.016823404, 0.007905321, 0.034658395, 0.09977579, -0.05916761, 0.004779212, 0.018820778, -0.15795219, -0.013125517, 0.021101758, -0.055992976, 0.08024182, -0.04333755, 0.070356764, -0.030624833, 0.09123745));
	target2 += mul(ni1, float4x4(-0.007931201, 0.0069976873, -0.016831044, -0.027368804, -0.03332386, -0.041667387, 0.04094055, 0.095304705, -0.006027611, -0.019209528, -0.0008929939, -0.017201519, 0.005464988, 0.0038448595, -0.01248845, 0.008877873));
	target2 += mul(na2, float4x4(-0.042160366, 0.0036025376, -0.008628986, -0.005607383, 0.028637825, 0.005296032, -0.0004143198, 0.008265197, 0.033176135, 0.014727739, 0.0145593295, 0.011159069, 0.00833305, -0.0025515268, -0.00015546188, 0.002805437));
	target2 += mul(nb2, float4x4(0.016752163, 0.013423374, -0.018342504, 0.013459657, -0.038428728, -0.005804395, 0.019692563, -0.005745392, -0.030070104, 0.01058409, 0.003989377, 0.0074200635, -0.01936366, -0.01608809, 0.0071134195, -0.0038598357));
	target2 += mul(nc2, float4x4(-0.018000437, -0.0121247275, -0.01288339, -0.0060898345, -0.006138964, -0.0035810755, -0.03902352, 0.002276941, 0.0032195079, -0.02730975, -0.011268412, -0.0036179612, -0.004836894, -0.0015986725, -0.019751905, -0.0071931942));
	target2 += mul(nd2, float4x4(0.014426659, -0.05161329, 0.019196855, 0.002317663, -0.055477437, -0.007086505, -0.04151144, -0.027518485, -0.027440753, 0.003857541, -0.002143262, 0.013090804, 0.015745236, 0.021075105, 7.93909e-06, -0.009694458));
	target2 += mul(ne2, float4x4(0.0025894733, -0.017304689, -0.03299281, -0.0754248, 0.03428733, -0.03397887, 0.0108591765, 0.021311574, -0.04203291, -0.019728655, -0.09826257, -0.046157785, 0.22522739, 0.086717755, 0.15654634, 0.08489247));
	target2 += mul(nf2, float4x4(0.008495083, 0.00074552774, 0.038054205, 0.013044046, -0.027891211, 0.003249458, -0.018353004, -0.035205863, -0.010195661, -0.008145831, 0.014239584, -0.019779535, 0.011452498, 0.004117014, 0.08403766, 0.04357078));
	target2 += mul(ng2, float4x4(0.00020427872, 0.026861027, -0.01047743, 0.0034385168, 0.015686916, 0.00038722693, 0.0017860534, -0.021630246, -0.0084784245, -0.022648407, -0.0050631054, -0.016437376, -0.026458954, -0.011239073, -0.01145464, -0.0058855377));
	target2 += mul(nh2, float4x4(-0.0012052609, 0.009248192, 0.008875674, 0.03043022, 0.012489936, 0.019402692, 0.0378006, 0.05519605, 0.029059285, -0.0072894073, 0.0014154738, -0.03802288, -0.02321437, 0.09558396, -0.0550932, 0.036936663));
	target2 += mul(ni2, float4x4(0.010010094, 0.012796987, 0.0025080708, 0.013876455, -0.00536739, -0.016932324, -0.012128944, -0.0241354, 0.0077782627, 0.01584833, 0.033727348, 0.039302748, -0.026609577, -0.0062910756, -0.011042692, 0.031207075));
	target2 += float4(-0.0009249668, -0.0010178088, -0.00041991958, -0.0005421036);

	float4 target3 = mul(a1, float4x4(-0.01766077, -0.017591428, -0.0038036762, -0.023304595, -0.012525157, -0.0058148014, -0.0030130956, -0.011804012, 0.030511979, 0.028687771, 0.007858589, 0.004475508, -0.02585795, -0.01785211, 0.0053741997, 0.00074623496));
	target3 += mul(b1, float4x4(-0.040601525, 0.016486213, -0.01966552, 0.014969501, 0.05400945, 0.019022502, 0.0149923405, -0.0017570893, 0.040684238, -0.009271634, 0.026908487, 0.002365157, -0.03371985, 0.00928091, -0.058665182, -0.0047038617));
	target3 += mul(c1, float4x4(0.0034900296, 0.0028777388, -0.02543823, 0.005724228, 0.012073974, 0.0043754885, 0.04109826, 0.008040286, -0.00049979525, -0.0063444753, 0.030565983, -0.009352674, 0.01949427, 0.014168137, 0.009640578, 0.011481213));
	target3 += mul(d1, float4x4(0.026645018, -0.02211462, 0.06119815, 0.039082125, 0.09945218, 0.042240527, 0.054267537, 0.04693634, -0.004510591, -0.0041247807, -0.012629442, -0.008053095, -0.025141539, -0.025081929, 0.011338651, 0.029372308));
	target3 += mul(e1, float4x4(-0.102688424, -0.11533188, -0.09621349, -0.116714895, -0.025504943, 0.05013811, 0.024331303, 0.03946124, 0.026381869, 0.1011479, -0.0017481856, 0.027152762, -0.18783632, -0.13439077, -0.112003446, -0.12810163));
	target3 += mul(f1, float4x4(0.010783576, -0.00025257064, -0.0075445045, -0.04681932, -0.0021722934, -0.005758047, -0.0110701695, 0.023468157, 0.036986902, 0.023351438, 0.063143626, 0.09269854, -0.025713218, -0.011750105, -0.11722637, -0.07038934));
	target3 += mul(g1, float4x4(-0.026961634, -0.015106367, -0.0034014166, 0.02482031, -0.013892242, 0.04203608, -0.008226002, 0.004619446, 0.012888606, 0.010721662, -1.3880494e-05, -0.0033224574, 0.006727405, -0.0035630877, 0.0021499102, -0.00091816986));
	target3 += mul(h1, float4x4(0.0016877668, -0.02695227, -0.023388471, -0.053411417, 0.006777518, -0.024251794, 0.0015210172, 0.010034961, -0.00795588, -0.01645489, 0.012691467, 0.0061330614, 0.054507505, -0.041002143, 0.048495438, -0.004843492));
	target3 += mul(i1, float4x4(-0.0159168, -0.013163069, -0.0091357315, 0.0011109188, 0.022993349, 0.025777856, 0.013487494, 0.00304372, 0.014121591, 0.02415322, 0.006453722, 0.010679647, -0.00626483, -0.017908117, 0.0063728937, -0.04091484));
	target3 += mul(a2, float4x4(-0.0026799496, 0.0154166315, -0.0037383793, -0.002577431, 0.073905826, -0.043148544, 0.011774636, -0.016023275, 0.0099145975, -0.04718069, -0.013578048, -0.04220935, 0.018033838, -0.0025958812, -0.029762078, 0.0034059538));
	target3 += mul(b2, float4x4(-0.03239311, -0.025743088, 0.1116615, -0.027325295, -0.014691433, -0.013614988, 0.05034416, -0.04294835, -0.11013415, -0.014086726, 0.048601545, -0.04762435, -0.01944709, 0.054276068, 0.04073586, 0.019288493));
	target3 += mul(c2, float4x4(0.027851144, 0.014083208, -0.06432852, -0.024642657, 0.021185134, -0.015441491, 0.018058551, -0.017353412, -0.018814132, 0.026259383, -0.14238997, 0.06301044, 0.007324441, 0.00494394, 0.00020533071, 0.0024405916));
	target3 += mul(d2, float4x4(0.06092095, 0.025730716, -0.042129956, -0.026382709, -0.08284398, 0.03344148, 0.038016047, 0.0137958275, -0.025555719, 0.008199355, 0.0070835026, -0.01420561, 0.0493976, 0.121205755, 0.026178997, -0.006300481));
	target3 += mul(e2, float4x4(-0.18660638, -0.1658202, 0.116562665, 0.29287666, -0.13814074, 0.2658047, -0.270531, 0.19597577, -0.04692207, 0.28904793, -0.09829146, 0.24158104, -0.03946344, -0.12598358, -0.3361825, -0.19800447));
	target3 += mul(f2, float4x4(0.020092675, 0.049266458, 0.03696139, -0.046251137, 0.029122403, -0.008378672, 0.044602558, 0.092563495, -0.0036082428, -0.072675824, 0.030523287, 0.006169521, -0.031133244, -0.011250458, -0.026590217, 0.023079094));
	target3 += mul(g2, float4x4(0.007384019, 0.031913586, 0.002072675, -0.019807052, 0.010384438, 0.050076224, 0.010438329, 0.009595051, 0.022497892, 0.012009176, 0.009222753, -0.008563874, 0.017106988, -0.003105622, 0.01070336, 0.011805944));
	target3 += mul(h2, float4x4(0.017091183, -0.035133313, 0.012425838, -0.03395959, 0.03418688, -0.10616231, 0.0101681305, -0.03682252, -0.016497994, -0.05231084, 0.025178006, 0.008926557, 0.025942912, 0.019970346, 0.03534238, -0.07596637));
	target3 += mul(i2, float4x4(0.007215777, -0.0006424821, -0.020822426, 0.011314772, 0.0183502, -0.015352454, 0.02972497, -0.053287935, 0.024020335, -0.006380922, -0.008620774, -0.041896872, 0.021631774, 0.013320375, 0.024711635, -0.020357909));
	target3 += mul(na1, float4x4(-0.033131246, 0.027936278, -0.047840517, 0.0019488486, -0.17501047, -0.0178374, 0.02549812, -0.019010937, 0.079489246, -0.027291514, 0.004313802, -0.03478066, -0.004887971, 0.019281879, 0.04073947, 0.022658588));
	target3 += mul(nb1, float4x4(0.110482916, -0.021340236, 0.09848104, 0.0034104201, 0.0032655075, -0.04557326, 0.07156056, 0.045965493, -0.22822224, 0.115162075, 0.027745042, 0.07251069, 0.05100454, -0.034554593, -0.11214564, -0.009064197));
	target3 += mul(nc1, float4x4(-0.017621655, 0.01024623, 0.009554872, -0.00078690174, -0.0069463328, -0.014670676, -0.041410644, -0.007414249, -0.031177497, -0.007517117, -0.20814678, 0.049873244, -0.02482445, -0.031338003, 0.06920326, -0.015171424));
	target3 += mul(nd1, float4x4(0.18918292, -0.15450309, 0.05504167, -0.061840136, -0.057958793, -0.28908864, 0.06820344, 0.09923399, -0.008387437, -0.075379215, -0.01747373, 0.048925415, -0.13222353, -0.15354146, 0.022480693, 0.04943612));
	target3 += mul(ne1, float4x4(0.0469381, 0.05393423, 0.1681062, -0.10543653, 0.17948511, 0.16570628, -0.33344334, -0.13197891, 0.16509773, -0.26174626, 0.13757275, -0.29244694, 0.35424834, 0.35368237, 0.156861, -0.04775442));
	target3 += mul(nf1, float4x4(0.026892537, 0.0075510717, 0.015918663, 0.06070227, -0.02288592, 0.0027507204, 0.05279965, -0.03042772, -0.044760384, 0.0234673, 0.01604264, -0.04277388, 0.0011313064, -0.0052253264, 0.08374709, 0.14929597));
	target3 += mul(ng1, float4x4(0.016119812, 0.061383534, -0.013537205, -0.0017921093, 0.043676157, 0.09811408, -0.015655283, 0.0007943268, -0.053843908, 0.069290705, -0.028319253, 0.020141726, -0.038996387, -0.03628716, 0.012679114, 0.015012319));
	target3 += mul(nh1, float4x4(-0.02019775, 0.022393003, 0.020688228, 0.10277296, -0.06365119, -0.015666502, 0.012721399, -0.16204305, -0.0037819904, 0.012113873, -0.040969223, 0.069086574, -0.052415807, 0.060331605, -0.04201384, 0.07953157));
	target3 += mul(ni1, float4x4(-0.0019123453, 0.012750492, -0.007235785, -0.01268919, -0.038674437, -0.043993857, 0.028753003, 0.07664717, -0.015077012, -0.027486047, -0.011141094, -0.030269727, 0.0016567699, -0.003331901, -0.021631587, -0.00040226072));
	target3 += mul(na2, float4x4(-0.03769701, 0.0045639244, -0.0069983527, -0.0064906892, 0.03318896, 0.011733902, 0.0023203227, 0.013374876, 0.037507236, 0.018019466, 0.013330661, 0.009231364, 0.00018865235, -0.005706915, -0.00011657552, 0.0038968239));
	target3 += mul(nb2, float4x4(0.022072105, 0.019486066, -0.013029048, 0.017470635, -0.03662149, -0.011397823, 0.02397534, -0.008561204, -0.026196644, 0.01626692, 0.011886567, 0.021061733, -0.03310679, -0.025446283, -0.006469576, -0.010118362));
	target3 += mul(nc2, float4x4(-0.014853227, -0.0062806485, -0.005624992, 0.0017175867, -0.007843849, 0.0008925535, -0.041000694, 0.0049381475, 0.0019743184, -0.035099152, -0.01074269, -0.0128827905, -0.010841019, -0.0093286475, -0.030476939, -0.018505717));
	target3 += mul(nd2, float4x4(0.016344415, -0.04647131, 0.021242643, 0.004836572, -0.061090752, -0.006488986, -0.050970413, -0.029668579, -0.015889898, 0.010811246, 0.0018357672, 0.012481409, 0.008317143, 0.009978102, -0.0015472731, -0.011174326));
	target3 += mul(ne2, float4x4(-0.004087798, -0.01634328, -0.031607483, -0.068488315, 0.038035624, -0.02797923, 0.017972443, 0.029961389, -0.029277585, -0.015558678, -0.08634699, -0.039436456, 0.19870138, 0.06507983, 0.130592, 0.059745777));
	target3 += mul(nf2, float4x4(-0.0028183246, -0.008089249, 0.02188247, 0.0049699014, -0.03830487, -0.0079993615, -0.028960107, -0.045729056, 0.0021651732, 0.010072074, 0.031335246, 0.0012719089, 0.015795005, 0.011290197, 0.08071912, 0.04273827));
	target3 += mul(ng2, float4x4(-0.0011167483, 0.024682038, -0.009224286, 0.005520499, 0.014198537, -0.0032909375, 0.0005767499, -0.02676088, -0.0019766665, -0.015222206, -0.00080782827, -0.011807755, -0.02560086, -0.015391911, -0.008948504, -0.0062184683));
	target3 += mul(nh2, float4x4(-0.009399661, -0.0019192873, 0.000261681, 0.020112153, 0.0077712294, 0.019477246, 0.030144244, 0.053777162, 0.030650103, 0.0021887033, 0.0092345085, -0.029658241, -0.03723785, 0.073152155, -0.058525253, 0.0230170564));
	target3 += mul(ni2, float4x4(0.012911211, 0.010375983, -0.00055489264, 0.005504194, -0.004187377, -0.02239082, -0.008734182, -0.027458502, -0.005602922, 0.009588401, 0.015889015, 0.036346428, -0.038325973, -0.018252429, -0.02944341, 0.01149068));
	target3 += float4(-0.0021447246, -0.0025527438, -0.0016466968, -0.0020245572);

	float2 outputPt = GetOutputPt();

	pos -= 0.5f * outputPt;
	OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1);

	++gxy.x;
	pos.x += outputPt.x;
	OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1);
	
	++gxy.y;
	pos.y += outputPt.y;
	OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1);

	--gxy.x;
	pos.x -= outputPt.x;
	OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1);
}
